install all packages if needed, then library them. if file normalized_data.rds and filtered_data.rds does not exist, rerun A1.
#install Biocmanager in order to install necessary package
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
#install necessary package if not installed
if (!requireNamespace("GEOmetadb", quietly = TRUE))
BiocManager::install("GEOmetadb")
if (! requireNamespace("GEOquery", quietly = TRUE)) {
BiocManager::install("GEOquery")
}
if (! requireNamespace("edgeR", quietly = TRUE)) {
BiocManager::install("edgeR")
}
# install limma if not exist
if (! requireNamespace("limma", quietly = TRUE)) {
BiocManager::install("limma")
}
if (! requireNamespace("ComplexHeatmap", quietly = TRUE)) {
BiocManager::install("ComplexHeatmap")
}
if (! requireNamespace("circlize", quietly = TRUE)) {
install.packages("circlize")
}
library(dplyr)
library(GEOmetadb)
library(GEOquery)
library(edgeR)
library(knitr)
library(RSQLite)
library(limma)
library(ComplexHeatmap)
library(circlize)
# load all data from A1
if (! file.exists("./normalized_data.rds") || ! file.exists("filtered_data.rds")){
rmarkdown::render("BCB420_A1.Rmd")
}
First, we want to get the description for dataset .
# First, we want to get the description for dataset GSE145412.
gse <- getGEO("GSE145412",GSEMatrix=FALSE)
kable(data.frame(head(Meta(gse))), format = "html")
Then we want to show more information associated with the dataset.
# Then, we want to get information about the dataset
current_gpl <- names(GPLList(gse))[1]
current_gpl_info <- Meta(getGEO(current_gpl))
## Using locally cached version of GPL18573 found here:
## /tmp/Rtmp9GWc8z/GPL18573.soft
The first assignment was focused on clean up data, grouping, and using TMM method to normalized the dataset. the results are shown in both boxplot and density plot.
normalized_data = readRDS("normalized_data.rds")
# boxplot for normalized data
data2plot_after <- log2(normalized_data)
{boxplot(data2plot_after, xlab = "Samples", ylab = "log2 CPM",
las = 2, cex = 0.5, cex.lab = 0.5,
cex.axis = 0.5, main = "MetS/obese RNASeq Samples normalized")
abline(h = median(apply(data2plot_after, 2, median)), col = "green", lwd = 0.6, lty = "dashed")}
# density plot for normalized data
norm_counts_density <- apply(log2(normalized_data), 2, density)
xlim <- 0; ylim <- 0
for (i in 1:length(norm_counts_density)) {
xlim <- range(c(xlim, norm_counts_density[[i]]$x));
ylim <- range(c(ylim, norm_counts_density[[i]]$y))
}
cols <- rainbow(length(norm_counts_density))
ltys <- rep(1, length(norm_counts_density))
# plot the second density plot
plot(norm_counts_density[[1]], xlim=xlim, ylim=ylim, type="n",
ylab="Smoothing density of log2-CPM", main="MetS/obese RNASeq Samples normalized",
cex.lab = 0.85)
# plot each line
for (i in 1:length(norm_counts_density))
lines(norm_counts_density[[i]], col=cols[i], lty=ltys[i])
In this assignment, those genes will be ranked according to differential expression and I will perform enrichment analysis using g:Profiler to see the dominant themes in the top hits. From the results, I will determine if my study supports the original finding.
get the normalized data, and convert it to numerical matrix
# take a look at the data
kable(normalized_data[1:5,1:5], type="html")
| sample_1 | sample_4 | sample_5 | sample_6 | sample_7 | |
|---|---|---|---|---|---|
| 7SK | 1.8665112 | 0.4927562 | 2.3163723 | 0.6257193 | 0.4934407 |
| A1BG-AS1 | 0.2201429 | 0.2269133 | 0.2236347 | 0.1269422 | 0.1814833 |
| A2M-AS1 | 0.0419011 | 0.0575317 | 0.0302224 | 0.0658098 | 0.0579427 |
| A3GALT2 | 0.0448311 | 0.0154599 | 0.0187952 | 0.0399286 | 0.0213537 |
| AAAS | 0.4379074 | 0.4786196 | 0.4105074 | 0.5047566 | 0.4477859 |
heatmap_matrix <- as.matrix(normalized_data)
install required package and library them, then show the heat map
Show the heat map
current_heatmap
Use limma to draw the MDSplot
# MDS plot for the heat map
plotMDS(heatmap_matrix, col = rep(c("darkgreen","blue"),10))
# Draw MDS plot color by patient to see how samples are clustering
pat_colors <- rainbow(4)
pat_colors <- unlist(lapply(pat_colors,FUN=function(x){rep(x,2)}))
plotMDS(heatmap_matrix, col = pat_colors)
Now we want to introduce groups
# Now define groups
samples <- data.frame("type" = c("O", "L", "L", "O", "O", "L", "O",
"L", "L", "O", "L", "L", "L", "L",
"O", "O", "L", "O", "O", "O", "L",
"O", "L", "O", "L", "O", "L", "O",
"O", "L", "L", "O"),
"disease" = c("H", "H", "D", "D", "D", "D", "H",
"H", "H", "D", "H", "H", "D", "D",
"D", "D", "D", "H", "H", "H", "H",
"H", "D", "D", "H", "H", "H", "H",
"D", "D", "D", "D"),
"patients" = c("sample_1", "sample_4", "sample_5",
"sample_6", "sample_7", "sample_8",
"sample_9", "sample_10", "sample_11",
"sample_13", "sample_15", "sample_17",
"sample_18", "sample_19", "sample_21",
"sample_22", "sample_24", "sample_25",
"sample_28", "sample_29", "sample_32",
"sample_33", "sample_34", "sample_35",
"sample_36", "sample_39", "sample_42",
"sample_43", "sample_44", "sample_46",
"sample_47", "sample_48"))
# Add corresponding rownames to each cell
rownames(samples) <- colnames(normalized_data)[1:32]
samples <- data.frame(samples)
samples$name <- paste(samples$disease, samples$type, samples$patients)
colnames(heatmap_matrix) <- samples$name
colnames(normalized_data) <- samples$name
# Show the first 5 rows
samples[1:5,]
Create model matrix, and fit normalized data into the model, only used disease (H - healthy, D - disease) column for analysis
# set model to only disease
model_design <- model.matrix(~ samples$disease)
kable(model_design[1:5,], type="html")
| (Intercept) | samples$diseaseH |
|---|---|
| 1 | 1 |
| 1 | 1 |
| 1 | 0 |
| 1 | 0 |
| 1 | 0 |
expressionMatrix <- as.matrix(normalized_data)
minimalSet <- ExpressionSet(assayData=expressionMatrix)
fit <- lmFit(minimalSet, model_design)
Apply empirical Bayes to compute differential expression
fit2 <- eBayes(fit,trend=TRUE)
output_hits <- topTable(fit2, coef=ncol(model_design), adjust.method = "BH",
number = nrow(expressionMatrix))
kable(output_hits[1:5,],type="html")
| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| RP11-517B11.7 | 0.0118532 | 0.0339949 | 4.743916 | 0.0000435 | 0.4633653 | 1.8276821 |
| CACNA1C-AS1 | 0.0249612 | 0.0814327 | 4.279405 | 0.0001637 | 0.4633653 | 0.7380255 |
| CDKN2D | -1.5100146 | 5.5504830 | -4.233263 | 0.0001865 | 0.4633653 | 0.6304486 |
| ARPC1A | -0.2093313 | 1.4309654 | -4.065386 | 0.0002991 | 0.4633653 | 0.2407121 |
| ALOX12 | -0.1625949 | 0.2339444 | -3.938615 | 0.0004261 | 0.4633653 | -0.0515034 |
check how many genes has p value less than 0.05, and how many genes pass the correction
length(which(output_hits$P.Value < 0.05))
## [1] 1630
length(which(output_hits$adj.P.Val < 0.05))
## [1] 0
Question 1:
There are 1630 genes were significantly deferentially expressed, the thresholds I used was 0.05 because it is the most widely accepted p-value, and it means there is a less than 5% chance the result could be random.
there is no gene passes the correction, so now we will try to improve the result.
now we need to correct the p-value for my data using multiple hypothesis correction.
# creates a design matrix with the consideration of both body type and disease
model_design_pat <- model.matrix( ~ samples$type + samples$disease)
kable(model_design_pat,type="html")
| (Intercept) | samples\(typeO| samples\)diseaseH | |
|---|---|---|
| 1 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 0 | 0 |
| 1 | 1 | 0 |
| 1 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 0 |
| 1 | 0 | 1 |
| 1 | 0 | 1 |
| 1 | 0 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 0 |
| 1 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 1 |
| 1 | 1 | 1 |
| 1 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 1 |
| 1 | 0 | 0 |
| 1 | 1 | 0 |
| 1 | 0 | 1 |
| 1 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 1 |
| 1 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 0 |
# fit our data
fit_pat <- lmFit(minimalSet, model_design_pat)
# Apply empircal Bayes to compute differential expression
fit2_pat <- eBayes(fit_pat,trend=TRUE)
topfit_pat <- topTable(fit2_pat, coef=ncol(model_design_pat),
adjust.method = "BH",
number = nrow(expressionMatrix))
output_hits_pat <- topfit_pat[order(topfit_pat$P.Value),]
kable(output_hits_pat[1:5,], type="html")
| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| RP11-517B11.7 | 0.0118532 | 0.0339949 | 4.938247 | 0.0000269 | 0.4183861 | 2.2350673 |
| ARPC1A | -0.2093313 | 1.4309654 | -4.359947 | 0.0001377 | 0.4378044 | 0.8915791 |
| CACNA1C-AS1 | 0.0249612 | 0.0814327 | 4.221639 | 0.0002027 | 0.4378044 | 0.5719516 |
| CDKN2D | -1.5100146 | 5.5504830 | -4.171077 | 0.0002334 | 0.4378044 | 0.4554521 |
| ZNF880 | 0.0238263 | 0.1217908 | 4.002758 | 0.0003722 | 0.4378044 | 0.0693330 |
check how many genes has p value less than 0.05, and how many genes pass the correction
length(which(output_hits_pat$P.Value < 0.05))
## [1] 1715
length(which(output_hits_pat$adj.P.Val < 0.05))
## [1] 0
Question 2:
I choose to use the BH (Benjamini-Hochberg) method because it is a popular and powerful method for controlling the false discovery rate in the field of science. The number of genes that has p value less than 0.05 increased to 1715, but the number of genes pass correction is still 0.
now, we want to compare the two results by plot
# define colors
simple_model_pvalues <- data.frame(id = rownames(output_hits),
simple_pvalue=output_hits$P.Value)
pat_model_pvalues <- data.frame(id = rownames(output_hits_pat),
patient_pvalue = output_hits_pat$P.Value)
two_models_pvalues <- merge(simple_model_pvalues,
pat_model_pvalues,by.x=1,by.y=1)
two_models_pvalues$colour <- "black"
two_models_pvalues$colour[two_models_pvalues$simple_pvalue<0.05] <- "orange"
two_models_pvalues$colour[two_models_pvalues$patient_pvalue<0.05] <- "blue"
two_models_pvalues$colour[two_models_pvalues$simple_pvalue<0.05 &
two_models_pvalues$patient_pvalue<0.05] <- "red"
# Plotting
plot(two_models_pvalues$simple_pvalue, two_models_pvalues$patient_pvalue,
col = two_models_pvalues$colour, xlab = "simple model p-values",
ylab ="Patient model p-values", main="Simple vs Patient Limma")
# add legend
legend(0, 1,legend=c("simple", "patient", "both", "other"),
col=c("orange", "blue", "red", "black"), lty=1:2, cex=0.9)
The plot looks like a straight line, this suggest that the differential expression for both simple model and patient model gives similar result. the body type of the patients does not effect the result much.
The above result was come from limma, now we want to know the result using edgeR
filtered_data = readRDS("filtered_data.rds")
rownames(filtered_data) <- filtered_data$Group.1
# create a EdgeR object
d = DGEList(counts=as.matrix(filtered_data[2:33]), group=samples$disease)
# create model
model_design_pat <- model.matrix( ~ samples$type + samples$disease)
# estimate dispersion
d <- estimateDisp(d, model_design_pat)
# calculate normalization factors
d <- calcNormFactors(d)
# fit model
fit <- glmQLFit(d, model_design_pat)
# calculate differential expression
qlf.hea_vs_dis <- glmQLFTest(fit)
# get all results
qlf_output_hits <- topTags(qlf.hea_vs_dis,sort.by = "PValue",
n = nrow(filtered_data))
length(which(qlf_output_hits$table$PValue < 0.05)) # 136
## [1] 136
length(which(qlf_output_hits$table$FDR < 0.05)) # 0
## [1] 0
kable(topTags(qlf.hea_vs_dis), type="html")
|
|
|
|
# check how many genes are up regulated
length(which(qlf_output_hits$table$PValue < 0.05 &
qlf_output_hits$table$logFC > 0)) # 5
## [1] 5
# check how many genes are down regulated
length(which(qlf_output_hits$table$PValue < 0.05 &
qlf_output_hits$table$logFC < 0)) # 131
## [1] 131
This means no gene has a FDR value less than 0.05, and only 5 genes are up regulated and 131 genes are down regulated
Compare limma with Quasi liklihood
qlf_pat_model_pvalues <- data.frame(
ensembl_id = rownames(qlf_output_hits$table),
qlf_patient_pvalue=qlf_output_hits$table$PValue)
limma_pat_model_pvalues <- data.frame(
ensembl_id = rownames(output_hits_pat),
limma_patient_pvalue = output_hits_pat$P.Value)
two_models_pvalues <- merge(qlf_pat_model_pvalues,
limma_pat_model_pvalues,
by.x=1,by.y=1)
two_models_pvalues$colour <- "black"
two_models_pvalues$colour[two_models_pvalues$qlf_patient_pvalue<0.05] <- "orange"
two_models_pvalues$colour[two_models_pvalues$limma_patient_pvalue<0.05] <- "blue"
two_models_pvalues$colour[two_models_pvalues$qlf_patient_pvalue<0.05 & two_models_pvalues$limma_patient_pvalue<0.05] <- "red"
plot(two_models_pvalues$qlf_patient_pvalue,
two_models_pvalues$limma_patient_pvalue,
col = two_models_pvalues$colour,
xlab = "QLF patient model p-values",
ylab ="Limma Patient model p-values",
main="QLF vs Limma")
legend(0, 0.9,legend=c("QLF", "Limma", "both QLF and Limma", "other"),
col=c("orange", "blue", "red", "black"), lty=1:2, cex=0.6)
Show the amount of deferentially expressed genes using an MA Plot. I split the data into two groups, one is Healthy group and one is disease group. Since plotMD need to column of data, I calculated the average for each gene in both groups. Then I need to make sure using the right color for significant data, so I reordered the topfit_pat and output_hits. Then I need to define what gene to color, so I introduced a status for coloring. After all the process, I can make the MA plot for my data.
group1 <- normalized_data[, grepl("H O", names(normalized_data))]
group1 <- data.frame(Means=rowMeans(group1))
group2 <- normalized_data[, grepl("D O", names(normalized_data))]
group2 <- data.frame(Means=rowMeans(group2))
combined_group <- cbind(group1, group2)
temp_hit_pat <- topfit_pat[order(rownames(topfit_pat)),]
temp_hit <- output_hits[order(rownames(output_hits)),]
status <- rep(0, nrow(normalized_data))
status[temp_hit$logFC > 0 & temp_hit_pat$P.Value < 0.05] <- 1
status[temp_hit$logFC < 0 & temp_hit_pat$P.Value < 0.05] <- -1
plotMD(log2(combined_group[, c(1, 2)]), status=status, values=c(1,-1), hl.col=c("blue", "red"), ylab="M - ratio log expression", main="Healthy vs Disease", )
Question3:
The gene of interest are highlighted in red and blue colors in the MA plot.
redraw the heat map with the condition p.value < 0.05
top_hits <- rownames(output_hits_pat)[output_hits_pat$P.Value < 0.05]
heatmap_matrix_tophits <- scale(heatmap_matrix[which(rownames(heatmap_matrix) %in% top_hits),])
if(min(heatmap_matrix_tophits) == 0){
heatmap_col = colorRamp2(c( 0, max(heatmap_matrix_tophits)),
c( "white", "red"))
} else {
heatmap_col = colorRamp2(c(min(heatmap_matrix_tophits), 0,
max(heatmap_matrix_tophits)),
c("blue", "white", "red"))
}
current_heatmap <- Heatmap(as.matrix(heatmap_matrix_tophits),
cluster_rows = TRUE,
cluster_columns = TRUE,
show_row_dend = TRUE,
show_column_dend = TRUE,
col=heatmap_col,
show_column_names = TRUE,
show_row_names = FALSE,
show_heatmap_legend = TRUE,
)
print out the heat map
current_heatmap
Now we want to see how the heatmap changes after we add the group condition
top_hits <- rownames(output_hits_pat)[output_hits_pat$P.Value < 0.05]
heatmap_matrix_tophits <- scale(heatmap_matrix[which(rownames(heatmap_matrix) %in% top_hits),])
heatmap_matrix_tophits<- heatmap_matrix_tophits[,
c(grep(colnames(heatmap_matrix_tophits),
pattern = "H"),
grep(colnames(heatmap_matrix_tophits),
pattern = "D"))]
if(min(heatmap_matrix_tophits) == 0){
heatmap_col = colorRamp2(c( 0, max(heatmap_matrix_tophits)),
c( "white", "red"))
} else {
heatmap_col = colorRamp2(c(min(heatmap_matrix_tophits), 0,
max(heatmap_matrix_tophits)),
c("blue", "white", "red"))
}
current_heatmap <- Heatmap(as.matrix(heatmap_matrix_tophits),
cluster_rows = TRUE,
cluster_columns = FALSE,
show_row_dend = TRUE,
show_column_dend = FALSE,
col=heatmap_col,
show_column_names = TRUE,
show_row_names = FALSE,
show_heatmap_legend = TRUE,
)
print out the heat map
current_heatmap
The data shows more clustering after we group the columns by disease, now we will redraw the heatmap with group the columns by disease, and p-value < 0.01.
top_hits <- rownames(output_hits_pat)[output_hits_pat$P.Value < 0.01]
heatmap_matrix_tophits <- scale(heatmap_matrix[which(rownames(heatmap_matrix) %in% top_hits),])
heatmap_matrix_tophits<- heatmap_matrix_tophits[,
c(grep(colnames(heatmap_matrix_tophits),
pattern = "H"),
grep(colnames(heatmap_matrix_tophits),
pattern = "D"))]
if(min(heatmap_matrix_tophits) == 0){
heatmap_col = colorRamp2(c( 0, max(heatmap_matrix_tophits)),
c( "white", "red"))
} else {
heatmap_col = colorRamp2(c(min(heatmap_matrix_tophits), 0,
max(heatmap_matrix_tophits)),
c("blue", "white", "red"))
}
current_heatmap <- Heatmap(as.matrix(heatmap_matrix_tophits),
cluster_rows = TRUE,
cluster_columns = FALSE,
show_row_dend = TRUE,
show_column_dend = FALSE,
col=heatmap_col,
show_column_names = TRUE,
show_row_names = FALSE,
show_heatmap_legend = TRUE,
)
print out the heat map
current_heatmap
The colors more intense than before because we only use the genes with a p-value less than 0.01. The clusters can be clearly seen.
Question 4:
Yes, my conditions cluster together because the clusters on the heatmap can be clearly seen. We can see a clear difference between healthy and disease group after we add the group condition.
Create thresholded lists using edgeR results
qlf_output_hits_withgn <- merge(filtered_data[,1],qlf_output_hits, by.x=1, by.y = 0)
qlf_output_hits_withgn[,"rank"] <- -log(qlf_output_hits_withgn$PValue,base =10) *
sign(qlf_output_hits_withgn$logFC)
qlf_output_hits_withgn <- qlf_output_hits_withgn[order(qlf_output_hits_withgn$rank),]
upregulated_genes <- qlf_output_hits_withgn$x[which(qlf_output_hits_withgn$PValue < 0.05 &
qlf_output_hits_withgn$logFC > 0)]
downregulated_genes <- qlf_output_hits_withgn$x[which(qlf_output_hits_withgn$PValue < 0.05 &
qlf_output_hits_withgn$logFC < 0)]
write.table(x=upregulated_genes,
file=file.path("data","mets_upregulated_genes.txt"),sep = "\t",
row.names = FALSE,col.names = FALSE,quote = FALSE)
write.table(x=downregulated_genes,
file=file.path("data","mets_downregulated_genes.txt"),sep = "\t",
row.names = FALSE,col.names = FALSE,quote = FALSE)
write.table(x=data.frame(genename= qlf_output_hits_withgn$x,F_stat= qlf_output_hits_withgn$rank),
file=file.path("data","mets_ranked_genelist.txt"),sep = "\t",
row.names = FALSE,col.names = FALSE,quote = FALSE)
I choose to use g:Profiler because it is easy to use and can give lots of information about the relative disease, it also has a lot of data sources for use. The Significance threshold was set to BH method.
I chose to use those four dataset because it was used in homework assignment and we are interested in biological pathway and biological process
With threshold less than 0.05, there are 86 gene sets are from up regulated genes, 552 gene sets are from down regulated genes, and 543 gene sets are from differential expression data.
The top result for up-regulated set of genes are establishment of protein localization to endoplasmic reticulum in GO:BP, Ribosome in KEGG, Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC) in REAC and Cytoplasmic Ribosomal Proteins for WP. The top result for down regulated set of genes are neutrophil activation involved in immune response in GO:BP, Chemokine signaling pathway in KEGG, Neutrophil degranulation in REAC and Microglia Pathogen Phagocytosis Pathway for WP. There is not much changes after adding all deferentially expressed genes together since most of the genes are down regulated and only 5 genes are up regulated. so the top result for all deferentially expressed genes and own regulated deferentially expressed genes are same, the results are shown as below.
The over-representation result mechanism discussed the original paper, in the paper, up-regulated genes are at least 440, and down-regulated genes are at least 462. But in my analysis, only 5 genes are up-regulated, and 131 genes are down-regulated. This might because in the original experiment, they split the data into four groups with MetS obese, MetS lean, healthy lean, and healthy obese, and when they do comparison, they will fix one condition and compare the other condition. In my case, obese objects and lean objects are mixed together and looking for the comparison between MetS and healthy.
support: In Andersen et al.(2016), the article stated that MetS and obesity could cause inflammation and trigger immunu responds, this evidence directly supports the finding.
support: in the original document, the RPS24 and RPL34 are both up-regulated and are associated with EIF2 Signaling. The comparison is between healthy obese and haelthy lean object, this means those genes may related to the cause of obese.
Andersen, Catherine J, Kelsey E Murphy, and Maria Luz Fernandez. 2016. “Impact of Obesity and Metabolic Syndrome on Immunity.” Advances in Nutrition 7 (1): 66–75. https://doi.org/10.3945/an.115.010207.
Davis, Sean, and Paul Meltzer. 2007. “GEOquery: A Bridge Between the Gene Expression Omnibus (Geo) and Bioconductor.” Bioinformatics 14: 1846–7.
Gu, Zuguang, Roland Eils, and Matthias Schlesner. 2016. “Complex Heatmaps Reveal Patterns and Correlations in Multidimensional Genomic Data.” Bioinformatics.
Gu, Zuguang, Lei Gu, Roland Eils, Matthias Schlesner, and Benedikt Brors. 2014. “Circlize Implements and Enhances Circular Visualization in R.” Bioinformatics 30 (19): 2811–2.
Jassal, Bijay, Lisa Matthews, Guilherme Viteri, Chuqiao Gong, Pascual Lorente, Antonio Fabregat, Konstantinos Sidiropoulos, et al. 2019. “The Reactome Pathway Knowledgebase.” Nucleic Acids Research. https://doi.org/10.1093/nar/gkz1031.
Kanehisa, Minoru. 2019. “Toward Understanding the Origin and Evolution of Cellular Organisms.” Protein Science 28 (11): 1947–51. https://doi.org/10.1002/pro.3715.
Kanehisa, Minoru, Miho Furumichi, Yoko Sato, Mari Ishiguro-Watanabe, and Mao Tanabe. 2020. “KEGG: Integrating Viruses and Cellular Organisms.” Nucleic Acids Research 49 (D1). https://doi.org/10.1093/nar/gkaa970.
McCarthy, Davis J., Yunshun Chen, and Gordon K. Smyth. 2012. “Differential Expression Analysis of Multifactor Rna-Seq Experiments with Respect to Biological Variation.” Nucleic Acids Research 40 (10): 4288–97. https://doi.org/10.1093/nar/gks042.
Mi, Huaiyu, Anushya Muruganujan, Dustin Ebert, Xiaosong Huang, and Paul D Thomas. 2018. “PANTHER Version 14: More Genomes, a New Panther Go-Slim and Improvements in Enrichment Analysis Tools.” Nucleic Acids Research 47 (D1). https://doi.org/10.1093/nar/gky1038.
Morgan, Martin. 2019. BiocManager: Access the Bioconductor Project Package Repository. https://CRAN.R-project.org/package=BiocManager.
Müller, Kirill, Hadley Wickham, David A. James, and Seth Falcon. 2020. RSQLite: ’SQLite’ Interface for R. https://CRAN.R-project.org/package=RSQLite.
Paczkowska-Abdulsalam, Magdalena, Magdalena Niemira, Agnieszka Bielska, Anna Szałkowska, Beata Anna Raczkowska, Sini Junttila, Attila Gyenesei, et al. 2020. “Evaluation of Transcriptomic Regulations Behind Metabolic Syndrome in Obese and Lean Subjects.” International Journal of Molecular Sciences 21 (4): 1455. https://doi.org/10.3390/ijms21041455.
Reimand, Jüri, Meelis Kull, Hedi Peterson, Jaanus Hansen, and Jaak Vilo. 2007. “G:Profiler—a Web-Based Toolset for Functional Profiling of Gene Lists from Large-Scale Experiments.” Nucleic Acids Research 35. https://doi.org/10.1093/nar/gkm226.
Ritchie, Matthew E, Belinda Phipson, Di Wu, Yifang Hu, Charity W Law, Wei Shi, and Gordon K Smyth. 2015. “limma Powers Differential Expression Analyses for RNA-Sequencing and Microarray Studies.” Nucleic Acids Research 43 (7): e47. https://doi.org/10.1093/nar/gkv007.
Robinson, Mark D, Davis J McCarthy, and Gordon K Smyth. 2010. “EdgeR: A Bioconductor Package for Differential Expression Analysis of Digital Gene Expression Data.” Bioinformatics 26 (1): 139–40. https://doi.org/10.1093/bioinformatics/btp616.
Slenter, Denise N, Martina Kutmon, Kristina Hanspers, Anders Riutta, Jacob Windsor, Nuno Nunes, Jonathan Mélius, et al. 2017. “WikiPathways: A Multifaceted Pathway Database Bridging Metabolomics to Other Omics Research.” Nucleic Acids Research 46 (D1). https://doi.org/10.1093/nar/gkx1064.
Wickham, Hadley, Romain Fran?ois, Lionel Henry, and Kirill Müller. 2020. Dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr.
Xie, Yihui. 2020. Knitr: A General-Purpose Package for Dynamic Report Generation in R. https://yihui.org/knitr/.
Zhu, Yuelin, Sean Davis, Robert Stephens, Paul S. Meltzer, and Yidong Chen. 2008. “GEOmetadb: Powerful Alternative Search Engine for the Gene Expression Omnibus.” Bioinformatics (Oxford, England) 24 (23): 2798–2800. https://doi.org/10.1093/bioinformatics/btn520.